Introdução

Conjunto de dados utilizado

Análise exploratória de dados do IMDB sobre seriados de TV e Streaming. Os dados originais e as variáveis vêm deste repositorio . Lá consta a explicação de como os dados foram gerados e do significado de cada variável.

episodes <- read_csv(here("data/series_from_imdb.csv"), 
                    progress = FALSE,
                    col_types = cols(.default = col_double(), 
                                     series_name = col_character(), 
                                     episode = col_character(), 
                                     url = col_character(),
                                     season = col_character())) 
episodes %>% 
    glimpse()
Observations: 32,070
Variables: 18
$ series_name <chr> "13 Reasons Why", "13 Reasons Why", "13 Reasons Why", "13 Reasons Why", "13 Reasons Why", "13 ...
$ episode     <chr> "Tape 1, Side A", "Tape 1, Side B", "Tape 2, Side A", "Tape 2, Side B", "Tape 3, Side A", "Tap...
$ series_ep   <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ...
$ season      <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",...
$ season_ep   <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ...
$ url         <chr> "http://www.imdb.com/title/tt5174246/", "http://www.imdb.com/title/tt5174248/", "http://www.im...
$ user_rating <dbl> 8.5, 8.2, 8.1, 8.3, 8.5, 8.3, 8.6, 8.4, 8.9, 8.8, 9.3, 9.2, 9.4, 8.6, 8.3, 8.2, 8.7, 8.4, 8.6,...
$ user_votes  <dbl> 3661, 3009, 2784, 2658, 2617, 2491, 2548, 2436, 2507, 2490, 3403, 2898, 4053, 2174, 1603, 1471...
$ r1          <dbl> 0.04143948, 0.04176334, 0.04446038, 0.05065666, 0.05718643, 0.05128205, 0.05951449, 0.06022122...
$ r2          <dbl> 0.003816794, 0.003646006, 0.003226963, 0.002251407, 0.002668700, 0.004407051, 0.004306969, 0.0...
$ r3          <dbl> 0.0032715376, 0.0046403712, 0.0046611689, 0.0030018762, 0.0022874571, 0.0020032051, 0.00313234...
$ r4          <dbl> 0.004634678, 0.006297647, 0.008246683, 0.005253283, 0.006099886, 0.010016026, 0.007830854, 0.0...
$ r5          <dbl> 0.011177754, 0.013258204, 0.019361778, 0.016510319, 0.013343500, 0.014823718, 0.013312451, 0.0...
$ r6          <dbl> 0.031079607, 0.036460060, 0.043743277, 0.038273921, 0.033930614, 0.045673077, 0.028582616, 0.0...
$ r7          <dbl> 0.09133043, 0.13059330, 0.13302259, 0.11031895, 0.09264201, 0.10576923, 0.08026625, 0.09954937...
$ r8          <dbl> 0.20692475, 0.27842227, 0.28002868, 0.25628518, 0.20243995, 0.26322115, 0.16679718, 0.21630479...
$ r9          <dbl> 0.2764449, 0.2031820, 0.1724632, 0.2112570, 0.2436142, 0.1875000, 0.2411903, 0.2105694, 0.2751...
$ r10         <dbl> 0.3298800, 0.2817368, 0.2907852, 0.3061914, 0.3457873, 0.3153045, 0.3950666, 0.3478083, 0.4432...
episodes <- left_join(episodes, sumario_simples,
                      by = c("series_name", "season")) %>% 
    group_by(series_name, season) %>%
    mutate(middle_eps = (season_ep > p20) &
               (season_ep < p80)) %>% 
    ungroup()
episodes
 p <- episodes %>% 
          ggplot(aes(x = series_name,
                 y = user_rating,
                 color=series_name)) + 
        facet_wrap(~ season) + 
        geom_boxplot(position = position_dodge(width = 0.9)) +
        theme(axis.title.x=element_blank(),
              axis.text.x=element_blank(),
              axis.ticks.x=element_blank())
ggplotly(p)
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
p <-  episodes %>% 
      ggplot(aes(x = series_name, y = user_rating, color=middle_eps)) + 
        geom_boxplot(alpha =0.3) +
        facet_wrap(~ season) +
        coord_flip()
ggplotly(p)
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
LS0tCnRpdGxlOiAiRURBIEludGVyYXRpdmEgc29icmUgc8OpcmllcyBubyBJTURCIgpzdWJ0aXRsZTogJycKYXV0aG9yOiAiSm9zw6kgQmVuYXJkaSBkZSBTb3V6YSBOdW5lcyIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKLS0tCgo8YnI+PC9icj4KCiMjIEludHJvZHXDp8OjbwoKIyMjIENvbmp1bnRvIGRlIGRhZG9zIHV0aWxpemFkbwoKPiBBbsOhbGlzZSBleHBsb3JhdMOzcmlhIGRlIGRhZG9zIGRvIFtJTURCXShodHRwczovL3d3dy5pbWRiLmNvbS8pICBzb2JyZSBzZXJpYWRvcyBkZSBUViBlIFN0cmVhbWluZy4gT3MgZGFkb3Mgb3JpZ2luYWlzIGUgYXMgdmFyacOhdmVpcyB2w6ptIFtkZXN0ZSByZXBvc2l0b3Jpb10oaHR0cHM6Ly9naXRodWIuY29tL25hemFyZW5vL2ltZGItc2VyaWVzKSAuIEzDoSBjb25zdGEgYSBleHBsaWNhw6fDo28gZGUgY29tbyBvcyBkYWRvcyBmb3JhbSBnZXJhZG9zIGUgZG8gc2lnbmlmaWNhZG8gZGUgY2FkYSB2YXJpw6F2ZWwuCgpgYGB7ciBzZXR1cCwgZWNobz1GQUxTRSwgd2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRX0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoaGVyZSkKbGlicmFyeShwbG90bHkpCnRoZW1lX3NldCh0aGVtZV9idygpKQpgYGAKCmBgYHtyfQplcGlzb2RlcyA8LSByZWFkX2NzdihoZXJlKCJkYXRhL3Nlcmllc19mcm9tX2ltZGIuY3N2IiksIAogICAgICAgICAgICAgICAgICAgIHByb2dyZXNzID0gRkFMU0UsCiAgICAgICAgICAgICAgICAgICAgY29sX3R5cGVzID0gY29scyguZGVmYXVsdCA9IGNvbF9kb3VibGUoKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzZXJpZXNfbmFtZSA9IGNvbF9jaGFyYWN0ZXIoKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBlcGlzb2RlID0gY29sX2NoYXJhY3RlcigpLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHVybCA9IGNvbF9jaGFyYWN0ZXIoKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNlYXNvbiA9IGNvbF9jaGFyYWN0ZXIoKSkpIAplcGlzb2RlcyAlPiUgCiAgICBnbGltcHNlKCkKYGBgCgpgYGB7cn0KZXBpc29kZXMgPC0gbGVmdF9qb2luKGVwaXNvZGVzLCBzdW1hcmlvX3NpbXBsZXMsCiAgICAgICAgICAgICAgICAgICAgICBieSA9IGMoInNlcmllc19uYW1lIiwgInNlYXNvbiIpKSAlPiUgCiAgICBncm91cF9ieShzZXJpZXNfbmFtZSwgc2Vhc29uKSAlPiUKICAgIG11dGF0ZShtaWRkbGVfZXBzID0gKHNlYXNvbl9lcCA+IHAyMCkgJgogICAgICAgICAgICAgICAoc2Vhc29uX2VwIDwgcDgwKSkgJT4lIAogICAgdW5ncm91cCgpCmVwaXNvZGVzCmBgYAoKCmBgYHtyfQogcCA8LSBlcGlzb2RlcyAlPiUgCiAgICAgICAgICBnZ3Bsb3QoYWVzKHggPSBzZXJpZXNfbmFtZSwKICAgICAgICAgICAgICAgICB5ID0gdXNlcl9yYXRpbmcsCiAgICAgICAgICAgICAgICAgY29sb3I9c2VyaWVzX25hbWUpKSArIAogICAgICAgIGZhY2V0X3dyYXAofiBzZWFzb24pICsgCiAgICAgICAgZ2VvbV9ib3hwbG90KHBvc2l0aW9uID0gcG9zaXRpb25fZG9kZ2Uod2lkdGggPSAwLjkpKSArCiAgICAgICAgdGhlbWUoYXhpcy50aXRsZS54PWVsZW1lbnRfYmxhbmsoKSwKICAgICAgICAgICAgICBheGlzLnRleHQueD1lbGVtZW50X2JsYW5rKCksCiAgICAgICAgICAgICAgYXhpcy50aWNrcy54PWVsZW1lbnRfYmxhbmsoKSkKCmdncGxvdGx5KHApCmBgYAoKYGBge3J9CnAgPC0gIGVwaXNvZGVzICU+JSAKICAgICAgZ2dwbG90KGFlcyh4ID0gc2VyaWVzX25hbWUsIHkgPSB1c2VyX3JhdGluZywgY29sb3I9bWlkZGxlX2VwcykpICsgCiAgICAgICAgZ2VvbV9ib3hwbG90KGFscGhhID0wLjMpICsKICAgICAgICBmYWNldF93cmFwKH4gc2Vhc29uKSArCiAgICAgICAgY29vcmRfZmxpcCgpCgpnZ3Bsb3RseShwKQpgYGAK